PARCOMPUTE = TRUE
N_CORE = parallel::detectCores()
In this notebook, we repeat the analysis of 02_temporal_heterogeneity.Rmd for all of our core indicators.
# Fetch the following sources and signals from the API
# TODO: Add Google Symptoms "eventually"
source_names = c("doctor-visits", "fb-survey", "fb-survey",
"hospital-admissions", "hospital-admissions")
signal_names = c("smoothed_adj_cli", "smoothed_cli", "smoothed_hh_cmnty_cli",
"smoothed_adj_covid19_from_claims", "smoothed_adj_covid19_from_claims")
pretty_names = c("Doctor visits", "Facebook CLI", "Facebook CLI-in-community",
"Hospitalizations", "Hospitalizations")
target_names = c("Cases", "Cases", "Cases", "Cases", "Deaths")
geo_level = params$geo_value
start_day = "2020-04-15"
end_day = NULL
cache_fname = sprintf('cached_data/12_heterogeneity_core_indicators_%s.RDS',
geo_level)
if (!file.exists(cache_fname)) {
df_signals = vector("list", length(signal_names))
for (i in 1:length(signal_names)) {
df_signals[[i]] = suppressWarnings(
covidcast_signal(source_names[i], signal_names[i],
start_day, end_day,
geo_type=geo_level))
}
# Fetch USAFacts confirmed case incidence proportion (smoothed with 7-day
# trailing average)
df_cases = suppressWarnings(
covidcast_signal("usa-facts", "confirmed_7dav_incidence_prop",
start_day, end_day,
geo_type=geo_level))
df_deaths = suppressWarnings(
covidcast_signal("usa-facts", "deaths_7dav_incidence_prop",
start_day, end_day,
geo_type=geo_level))
saveRDS(list(df_signals, df_cases, df_deaths), cache_fname)
} else {
cached_data = readRDS(cache_fname)
df_signals = cached_data[[1]]
df_cases = cached_data[[2]]
df_deaths = cached_data[[3]]
}
case_num = 500
if (geo_level == 'county') {
geo_values = suppressWarnings(covidcast_signal("usa-facts", "confirmed_cumulative_num",
'2020-11-01',
'2020-11-01')) %>%
filter(value >= case_num) %>% pull(geo_value)
} else if (geo_level == 'state') {
geo_values = unique(df_signals[[1]]$geo_value)
}
## Fetched day 2020-11-01: 1, success, num_entries = 3192
sensorize_time_ranges = list(
c(-42, -8),
c(-49, -8),
c(-56, -8),
c(-63, -8),
c(-70, -8)
)
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
ind_df = tibble(df_signals[[ind_idx]]) %>% filter(geo_value %in% geo_values)
ind_target = inner_join(ind_df, tibble(df_target),
by=c('geo_value', 'time_value')) %>% select (
geo_value=geo_value,
time_value=time_value,
indicator_value=value.x,
target_value=value.y,
)
ind_global_sensorized = ind_target %>% group_by (
geo_value,
) %>% group_modify ( ~ {
fit = lm(target_value ~ indicator_value, data =.x);
tibble(time_value=.x$time_value,
indicator_value=.x$indicator_value,
target_value=.x$target_value,
sensorized_value=fit$fitted.values)
}) %>% ungroup
df_global_sensorized = ind_global_sensorized %>% transmute (
geo_value=geo_value,
signal='ind_sensorized',
time_value=time_value,
direction=NA,
issue=lubridate::ymd('2020-11-01'),
lag=NA,
value=sensorized_value,
stderr=NA,
sample_size=NA,
data_source='linear_sensorization',
)
attributes(df_global_sensorized)$geo_type = 'county'
attributes(df_global_sensorized)$metadata$geo_type = 'county'
class(df_global_sensorized) = c("covidcast_signal", "data.frame")
base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
if (!file.exists(base_cor_fname)) {
df_cor_base_ind = covidcast_cor(df_signals[[ind_idx]], df_target,
by='time_value', method='spearman')
df_cor_sensorized_ind = covidcast_cor(df_global_sensorized, df_target,
by='time_value', method='spearman')
df_cor_base = rbind(df_cor_base_ind, df_cor_sensorized_ind)
df_cor_base$Indicator = as.factor(c(rep('Raw', nrow(df_cor_base_ind)),
rep('Sensorized (Spatial)',
nrow(df_cor_sensorized_ind))))
saveRDS(df_cor_base, base_cor_fname)
} else {
df_cor_base = readRDS(base_cor_fname)
}
sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
if (!file.exists(sensorize_fname)) {
sensorize_cors = vector('list', length(sensorize_time_ranges))
ind_target_sensorized_list = vector('list', length(sensorize_time_ranges))
for (outer_idx in 1:length(sensorize_time_ranges)) {
sensorize_llim = sensorize_time_ranges[[outer_idx]][1]
sensorize_ulim = sensorize_time_ranges[[outer_idx]][2]
min_sensorize_date = lubridate::ymd(start_day) - sensorize_llim
max_sensorize_date = max(ind_target$time_value)
sensorize_date_offsets = 0:(max_sensorize_date-min_sensorize_date)
joiner_df_list = vector('list', length(sensorize_date_offsets))
for (idx in 1:length(sensorize_date_offsets)) {
dt = sensorize_date_offsets[idx]
sensorize_date = min_sensorize_date + dt
joiner_df_list[[idx]] = tibble(
sensorize_date = sensorize_date,
time_value = sensorize_date + sensorize_llim:sensorize_ulim)
}
joiner_df = bind_rows(joiner_df_list)
if (!PARCOMPUTE) {
ind_sensorized_lm = ind_target %>% inner_join (
joiner_df,
on='time_value',
) %>% group_by (
geo_value,
sensorize_date,
) %>% group_modify (
~ broom::tidy(lm(target_value ~ indicator_value, data = .x))
) %>% ungroup
} else {
ind_grouped_list = ind_target %>% inner_join (
joiner_df,
on='time_value',
) %>% group_by (
geo_value,
sensorize_date,
) %>% group_split
ind_sensorized_lm = parallel::mclapply(ind_grouped_list, function(df) {
broom::tidy(
lm(target_value ~ indicator_value, data = df)
) %>% mutate (
geo_value = unique(df$geo_value),
sensorize_date = unique(df$sensorize_date),
)}, mc.cores = N_CORE) %>% bind_rows
}
ind_sensorized_wide = ind_sensorized_lm %>% select(
geo_value,
sensorize_date,
term,
estimate,
) %>% mutate (
term = sapply(term, function(x) {ifelse(x=='(Intercept)',
'intercept',
'slope')}),
) %>% pivot_wider (
id_cols = c(geo_value, sensorize_date),
names_from=term,
values_from=estimate,
)
ind_target_sensorized = ind_target %>% inner_join (
ind_sensorized_wide,
by=c('time_value'='sensorize_date',
'geo_value'),
) %>% mutate (
sensorized_value = intercept + indicator_value * slope,
)
df_sensorized = ind_target_sensorized %>% transmute (
geo_value=geo_value,
signal='ind_sensorized',
time_value=time_value,
direction=NA,
issue=lubridate::ymd('2020-11-01'),
lag=NA,
value=sensorized_value,
stderr=NA,
sample_size=NA,
data_source='linear_sensorization',
)
attributes(df_sensorized)$geo_type = 'county'
class(df_sensorized) = c("covidcast_signal", "data.frame")
df_cor_sensorized_ind = covidcast_cor(df_sensorized, df_target,
by='time_value', method='spearman')
df_cor_sensorized_ind$Indicator = sprintf('Sensorized (TS, %d:%d)',
sensorize_llim,
sensorize_ulim)
sensorize_cors[[outer_idx]] = df_cor_sensorized_ind
ind_target_sensorized_list[[outer_idx]] = ind_target_sensorized
}
saveRDS(sensorize_cors, sensorize_fname)
saveRDS(ind_target_sensorized_list, sensorize_val_fname)
} else {
sensorize_cors = readRDS(sensorize_fname)
ind_target_sensorized_list = readRDS(sensorize_val_fname)
}
df_cor = bind_rows(df_cor_base, sensorize_cors)
df_cor$Indicator = stringr::str_replace(df_cor$Indicator,
'Sensorized ',
"")
df_cor$Indicator = factor(df_cor$Indicator,
levels=c('Raw',
"(Spatial)",
sapply(sensorize_time_ranges,
function(x) {
sprintf("(TS, %d:%d)",
x[[1]], x[[2]])
})))
plt = ggplot(df_cor, aes(x = time_value, y = value)) +
geom_line(aes(color = Indicator)) +
labs(title = sprintf("Correlation between %s and %s",
pretty_names[ind_idx],
target_names[ind_idx]),
subtitle = "Per day",
x = "Date", y = "Correlation") +
theme(legend.position = "bottom")
print(plt)
}
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 287 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 281 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 281 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 287 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 287 row(s) containing missing values (geom_path).
QUANTS = c(0.01, 0.99)
# TODO: Add more "core indicators"
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
df_cor_base = readRDS(base_cor_fname)
sensorize_cors = readRDS(sensorize_fname)
sensorized_vals = readRDS(sensorize_val_fname)
for (inner_idx in 1:length(sensorize_time_ranges)) {
sv = sensorized_vals[[inner_idx]]
print(summary(sv$slope))
print(slope_limits <- quantile(sv$slope, QUANTS, na.rm=TRUE))
plt = ggplot(
sensorized_vals[[inner_idx]],
aes(x=time_value,
y=slope),
) + geom_point (
alpha=0.1,
size=0.5,
) + geom_hline (
yintercept=0,
colour='white',
) + stat_summary (
aes(y=slope,
group=1,
colour='median'),
fun=median,
geom="line",
group=1,
) + stat_summary (
aes(y=slope,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) + mad(x) },
geom="line",
group=1,
) + stat_summary (
aes(y=slope,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) - mad(x) },
geom="line",
group=1,
) + scale_colour_manual(
values=c("median"="maroon",
"+/- mad"="darkgreen")
) + labs(
colour=''
) + ggtitle(
sprintf("Slope distribution for %s[%s], fitted on t in %d:%d",
pretty_names[ind_idx],
target_names[ind_idx],
sensorize_time_ranges[[inner_idx]][1],
sensorize_time_ranges[[inner_idx]][2])
) + ylim (
slope_limits[[1]], slope_limits[[2]]
)
print(plt)
}
}
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -207823 0 1 352 2 49344442 512
## 1% 99%
## -8.183373 15.814039
## Warning: Removed 6068 rows containing non-finite values (stat_summary).
## Warning: Removed 6068 rows containing non-finite values (stat_summary).
## Warning: Removed 6068 rows containing non-finite values (stat_summary).
## Warning: Removed 6068 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -320534 0 1 183 2 49344442 265
## 1% 99%
## -7.00815 15.58148
## Warning: Removed 5603 rows containing non-finite values (stat_summary).
## Warning: Removed 5603 rows containing non-finite values (stat_summary).
## Warning: Removed 5603 rows containing non-finite values (stat_summary).
## Warning: Removed 5603 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -223.85639 -0.04753 0.84013 1.58293 2.52607 128.32528 157
## 1% 99%
## -6.248935 15.456388
## Warning: Removed 5281 rows containing non-finite values (stat_summary).
## Warning: Removed 5281 rows containing non-finite values (stat_summary).
## Warning: Removed 5281 rows containing non-finite values (stat_summary).
## Warning: Removed 5281 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -223.85639 0.03501 0.97259 1.73484 2.67309 128.32528 112
## 1% 99%
## -5.71044 15.57034
## Warning: Removed 5024 rows containing non-finite values (stat_summary).
## Warning: Removed 5024 rows containing non-finite values (stat_summary).
## Warning: Removed 5024 rows containing non-finite values (stat_summary).
## Warning: Removed 5024 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -223.8564 0.1251 1.1135 1.8766 2.8274 80.8365 92
## 1% 99%
## -5.193001 15.597722
## Warning: Removed 4794 rows containing non-finite values (stat_summary).
## Warning: Removed 4794 rows containing non-finite values (stat_summary).
## Warning: Removed 4794 rows containing non-finite values (stat_summary).
## Warning: Removed 4794 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1303.847 -1.374 0.575 2.642 4.127 8405.344 444
## 1% 99%
## -19.89650 40.66716
## Warning: Removed 2962 rows containing non-finite values (stat_summary).
## Warning: Removed 2962 rows containing non-finite values (stat_summary).
## Warning: Removed 2962 rows containing non-finite values (stat_summary).
## Warning: Removed 2962 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -216.785 -1.183 0.763 3.062 4.698 8405.344 233
## 1% 99%
## -17.55080 40.88737
## Warning: Removed 2629 rows containing non-finite values (stat_summary).
## Warning: Removed 2629 rows containing non-finite values (stat_summary).
## Warning: Removed 2629 rows containing non-finite values (stat_summary).
## Warning: Removed 2629 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -216.785 -1.023 0.975 3.543 5.386 311.780 125
## 1% 99%
## -15.62303 42.69872
## Warning: Removed 2399 rows containing non-finite values (stat_summary).
## Warning: Removed 2399 rows containing non-finite values (stat_summary).
## Warning: Removed 2399 rows containing non-finite values (stat_summary).
## Warning: Removed 2399 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -197.1008 -0.8469 1.2439 4.1345 6.1231 338.3233 93
## 1% 99%
## -14.39623 44.49256
## Warning: Removed 2247 rows containing non-finite values (stat_summary).
## Warning: Removed 2247 rows containing non-finite values (stat_summary).
## Warning: Removed 2247 rows containing non-finite values (stat_summary).
## Warning: Removed 2247 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -832.075 -0.665 1.521 4.970 6.923 4448.638 79
## 1% 99%
## -13.46018 46.51060
## Warning: Removed 2119 rows containing non-finite values (stat_summary).
## Warning: Removed 2119 rows containing non-finite values (stat_summary).
## Warning: Removed 2119 rows containing non-finite values (stat_summary).
## Warning: Removed 2119 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -151.51259 0.00013 0.31552 0.55582 0.91482 63.28586 184
## 1% 99%
## -1.399055 4.159723
## Warning: Removed 2688 rows containing non-finite values (stat_summary).
## Warning: Removed 2688 rows containing non-finite values (stat_summary).
## Warning: Removed 2688 rows containing non-finite values (stat_summary).
## Warning: Removed 2688 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -17.84620 0.03223 0.39043 0.62042 1.01508 63.26772 97
## 1% 99%
## -1.190915 4.053551
## Warning: Removed 2475 rows containing non-finite values (stat_summary).
## Warning: Removed 2475 rows containing non-finite values (stat_summary).
## Warning: Removed 2475 rows containing non-finite values (stat_summary).
## Warning: Removed 2475 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -12.53251 0.07068 0.46229 0.67891 1.09270 12.94531 68
## 1% 99%
## -0.9789953 3.9313443
## Warning: Removed 2322 rows containing non-finite values (stat_summary).
## Warning: Removed 2322 rows containing non-finite values (stat_summary).
## Warning: Removed 2322 rows containing non-finite values (stat_summary).
## Warning: Removed 2322 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -11.9703 0.1194 0.5312 0.7358 1.1576 19.2934 43
## 1% 99%
## -0.8288581 3.8135993
## Warning: Removed 2175 rows containing non-finite values (stat_summary).
## Warning: Removed 2175 rows containing non-finite values (stat_summary).
## Warning: Removed 2175 rows containing non-finite values (stat_summary).
## Warning: Removed 2175 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -9.6046 0.1726 0.6045 0.7916 1.2213 20.4073 39
## 1% 99%
## -0.6849835 3.7311353
## Warning: Removed 2055 rows containing non-finite values (stat_summary).
## Warning: Removed 2055 rows containing non-finite values (stat_summary).
## Warning: Removed 2055 rows containing non-finite values (stat_summary).
## Warning: Removed 2055 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -338.6237 -0.3298 0.3034 0.7798 1.3655 225.2022 196
## 1% 99%
## -9.87723 17.41612
## Warning: Removed 1768 rows containing non-finite values (stat_summary).
## Warning: Removed 1768 rows containing non-finite values (stat_summary).
## Warning: Removed 1768 rows containing non-finite values (stat_summary).
## Warning: Removed 1768 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -680.2869 -0.2446 0.3703 0.9293 1.4494 428.6766 125
## 1% 99%
## -7.547981 16.662617
## Warning: Removed 1609 rows containing non-finite values (stat_summary).
## Warning: Removed 1609 rows containing non-finite values (stat_summary).
## Warning: Removed 1609 rows containing non-finite values (stat_summary).
## Warning: Removed 1609 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -99.1135 -0.1784 0.4462 1.0308 1.5697 428.6766 101
## 1% 99%
## -6.677115 15.705571
## Warning: Removed 1521 rows containing non-finite values (stat_summary).
## Warning: Removed 1521 rows containing non-finite values (stat_summary).
## Warning: Removed 1521 rows containing non-finite values (stat_summary).
## Warning: Removed 1521 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -99.1135 -0.1203 0.5102 1.1019 1.6722 132.4731 86
## 1% 99%
## -5.732735 14.084465
## Warning: Removed 1448 rows containing non-finite values (stat_summary).
## Warning: Removed 1448 rows containing non-finite values (stat_summary).
## Warning: Removed 1448 rows containing non-finite values (stat_summary).
## Warning: Removed 1448 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -56.86037 -0.06857 0.58376 1.16484 1.77956 134.57789 65
## 1% 99%
## -5.090445 13.205784
## Warning: Removed 1363 rows containing non-finite values (stat_summary).
## Warning: Removed 1363 rows containing non-finite values (stat_summary).
## Warning: Removed 1363 rows containing non-finite values (stat_summary).
## Warning: Removed 1363 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -24.25239 -0.01499 0.00478 0.00363 0.03363 6.90200 196
## 1% 99%
## -0.5292977 0.4959266
## Warning: Removed 1768 rows containing non-finite values (stat_summary).
## Warning: Removed 1768 rows containing non-finite values (stat_summary).
## Warning: Removed 1768 rows containing non-finite values (stat_summary).
## Warning: Removed 1768 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -105.35003 -0.01229 0.00644 0.00486 0.03367 10.71691 125
## 1% 99%
## -0.3955613 0.3679982
## Warning: Removed 1615 rows containing non-finite values (stat_summary).
## Warning: Removed 1615 rows containing non-finite values (stat_summary).
## Warning: Removed 1615 rows containing non-finite values (stat_summary).
## Warning: Removed 1615 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -14.91943 -0.01012 0.00792 0.01361 0.03369 54.31979 101
## 1% 99%
## -0.3059589 0.3150041
## Warning: Removed 1514 rows containing non-finite values (stat_summary).
## Warning: Removed 1514 rows containing non-finite values (stat_summary).
## Warning: Removed 1514 rows containing non-finite values (stat_summary).
## Warning: Removed 1514 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -14.91943 -0.00780 0.00918 0.02134 0.03401 54.31979 86
## 1% 99%
## -0.2480163 0.2961774
## Warning: Removed 1448 rows containing non-finite values (stat_summary).
## Warning: Removed 1448 rows containing non-finite values (stat_summary).
## Warning: Removed 1448 rows containing non-finite values (stat_summary).
## Warning: Removed 1448 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -14.91943 -0.00621 0.01044 0.02375 0.03469 54.31979 65
## 1% 99%
## -0.2203199 0.2811966
## Warning: Removed 1367 rows containing non-finite values (stat_summary).
## Warning: Removed 1367 rows containing non-finite values (stat_summary).
## Warning: Removed 1367 rows containing non-finite values (stat_summary).
## Warning: Removed 1367 rows containing missing values (geom_point).
QUANTS = c(0.01, 0.99)
# TODO: Add more "core indicators"
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
df_cor_base = readRDS(base_cor_fname)
sensorize_cors = readRDS(sensorize_fname)
sensorized_vals = readRDS(sensorize_val_fname)
for (inner_idx in 1:length(sensorize_time_ranges)) {
sv = sensorized_vals[[inner_idx]]
print(summary(sv$intercept))
print(intercept_limits <- quantile(sv$intercept, QUANTS, na.rm=TRUE))
plt = ggplot(
sensorized_vals[[inner_idx]],
aes(x=time_value,
y=intercept),
) + geom_point (
alpha=0.1,
size=0.5,
) + geom_hline (
yintercept=0,
colour='white',
) + stat_summary (
aes(y=intercept,
group=1,
colour='median'),
fun=median,
geom="line",
group=1,
) + stat_summary (
aes(y=intercept,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) + mad(x) },
geom="line",
group=1,
) + stat_summary (
aes(y=intercept,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) - mad(x) },
geom="line",
group=1,
) + scale_colour_manual(
values=c("median"="maroon",
"+/- mad"="darkgreen")
) + labs(
colour=''
) + ggtitle(
sprintf("Intercept distribution for %s[%s], fitted on t in %d:%d",
pretty_names[ind_idx],
target_names[ind_idx],
sensorize_time_ranges[[inner_idx]][1],
sensorize_time_ranges[[inner_idx]][2])
) + ylim (
intercept_limits[[1]], intercept_limits[[2]]
)
print(plt)
}
}
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -367.950 1.263 5.624 9.958 14.412 664.227
## 1% 99%
## -28.29520 78.85247
## Warning: Removed 5566 rows containing non-finite values (stat_summary).
## Warning: Removed 5566 rows containing non-finite values (stat_summary).
## Warning: Removed 5566 rows containing non-finite values (stat_summary).
## Warning: Removed 5566 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -284.503 1.198 5.396 9.254 13.775 515.448
## 1% 99%
## -26.42673 71.03101
## Warning: Removed 5342 rows containing non-finite values (stat_summary).
## Warning: Removed 5342 rows containing non-finite values (stat_summary).
## Warning: Removed 5342 rows containing non-finite values (stat_summary).
## Warning: Removed 5342 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -210.579 1.146 5.194 8.665 13.228 515.448
## 1% 99%
## -24.63681 64.90918
## Warning: Removed 5126 rows containing non-finite values (stat_summary).
## Warning: Removed 5126 rows containing non-finite values (stat_summary).
## Warning: Removed 5126 rows containing non-finite values (stat_summary).
## Warning: Removed 5126 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -179.272 1.073 4.998 8.162 12.740 515.448
## 1% 99%
## -23.13580 58.90146
## Warning: Removed 4916 rows containing non-finite values (stat_summary).
## Warning: Removed 4916 rows containing non-finite values (stat_summary).
## Warning: Removed 4916 rows containing non-finite values (stat_summary).
## Warning: Removed 4916 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -883.9557 0.9753 4.8382 7.7040 12.3400 515.4477
## 1% 99%
## -22.18828 54.31040
## Warning: Removed 4702 rows containing non-finite values (stat_summary).
## Warning: Removed 4702 rows containing non-finite values (stat_summary).
## Warning: Removed 4702 rows containing non-finite values (stat_summary).
## Warning: Removed 4702 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2046.899 3.183 6.936 10.176 13.373 1336.984
## 1% 99%
## -2.61321 55.25018
## Warning: Removed 2526 rows containing non-finite values (stat_summary).
## Warning: Removed 2526 rows containing non-finite values (stat_summary).
## Warning: Removed 2526 rows containing non-finite values (stat_summary).
## Warning: Removed 2526 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -325.827 3.211 6.861 9.734 13.025 379.835
## 1% 99%
## -3.871893 50.590459
## Warning: Removed 2400 rows containing non-finite values (stat_summary).
## Warning: Removed 2400 rows containing non-finite values (stat_summary).
## Warning: Removed 2400 rows containing non-finite values (stat_summary).
## Warning: Removed 2400 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -220.039 3.238 6.767 9.383 12.747 295.142
## 1% 99%
## -5.328427 46.385625
## Warning: Removed 2276 rows containing non-finite values (stat_summary).
## Warning: Removed 2276 rows containing non-finite values (stat_summary).
## Warning: Removed 2276 rows containing non-finite values (stat_summary).
## Warning: Removed 2276 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -220.039 3.269 6.708 9.080 12.467 192.950
## 1% 99%
## -6.555056 43.174783
## Warning: Removed 2156 rows containing non-finite values (stat_summary).
## Warning: Removed 2156 rows containing non-finite values (stat_summary).
## Warning: Removed 2156 rows containing non-finite values (stat_summary).
## Warning: Removed 2156 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -129.604 3.297 6.631 8.787 12.268 182.047
## 1% 99%
## -7.584376 39.912065
## Warning: Removed 2042 rows containing non-finite values (stat_summary).
## Warning: Removed 2042 rows containing non-finite values (stat_summary).
## Warning: Removed 2042 rows containing non-finite values (stat_summary).
## Warning: Removed 2042 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2021.753 -6.191 0.592 -1.347 5.881 4085.872
## 1% 99%
## -83.01052 55.78910
## Warning: Removed 2508 rows containing non-finite values (stat_summary).
## Warning: Removed 2508 rows containing non-finite values (stat_summary).
## Warning: Removed 2508 rows containing non-finite values (stat_summary).
## Warning: Removed 2508 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2021.7535 -7.6206 0.0497 -2.5988 4.8680 881.7360
## 1% 99%
## -79.32388 48.06342
## Warning: Removed 2380 rows containing non-finite values (stat_summary).
## Warning: Removed 2380 rows containing non-finite values (stat_summary).
## Warning: Removed 2380 rows containing non-finite values (stat_summary).
## Warning: Removed 2380 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -414.7469 -9.0173 -0.6101 -3.6149 4.0112 523.8832
## 1% 99%
## -74.07784 42.61765
## Warning: Removed 2256 rows containing non-finite values (stat_summary).
## Warning: Removed 2256 rows containing non-finite values (stat_summary).
## Warning: Removed 2256 rows containing non-finite values (stat_summary).
## Warning: Removed 2256 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -439.039 -10.214 -1.442 -4.597 3.265 325.233
## 1% 99%
## -69.74859 36.31845
## Warning: Removed 2132 rows containing non-finite values (stat_summary).
## Warning: Removed 2132 rows containing non-finite values (stat_summary).
## Warning: Removed 2132 rows containing non-finite values (stat_summary).
## Warning: Removed 2132 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -439.039 -11.263 -2.386 -5.551 2.602 266.368
## 1% 99%
## -67.10767 31.21081
## Warning: Removed 2018 rows containing non-finite values (stat_summary).
## Warning: Removed 2018 rows containing non-finite values (stat_summary).
## Warning: Removed 2018 rows containing non-finite values (stat_summary).
## Warning: Removed 2018 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -430.151 3.113 7.327 10.373 14.076 1257.251
## 1% 99%
## -14.57452 63.05076
## Warning: Removed 1576 rows containing non-finite values (stat_summary).
## Warning: Removed 1576 rows containing non-finite values (stat_summary).
## Warning: Removed 1576 rows containing non-finite values (stat_summary).
## Warning: Removed 1576 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3416.232 3.223 7.296 9.997 13.624 1257.251
## 1% 99%
## -12.37331 57.95222
## Warning: Removed 1488 rows containing non-finite values (stat_summary).
## Warning: Removed 1488 rows containing non-finite values (stat_summary).
## Warning: Removed 1488 rows containing non-finite values (stat_summary).
## Warning: Removed 1488 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3416.232 3.289 7.241 9.669 13.335 1257.251
## 1% 99%
## -10.94083 53.17620
## Warning: Removed 1422 rows containing non-finite values (stat_summary).
## Warning: Removed 1422 rows containing non-finite values (stat_summary).
## Warning: Removed 1422 rows containing non-finite values (stat_summary).
## Warning: Removed 1422 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -378.967 3.315 7.154 9.397 13.055 414.310
## 1% 99%
## -9.807924 49.112827
## Warning: Removed 1362 rows containing non-finite values (stat_summary).
## Warning: Removed 1362 rows containing non-finite values (stat_summary).
## Warning: Removed 1362 rows containing non-finite values (stat_summary).
## Warning: Removed 1362 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -378.967 3.330 7.110 9.166 12.840 414.310
## 1% 99%
## -9.353046 45.779735
## Warning: Removed 1301 rows containing non-finite values (stat_summary).
## Warning: Removed 1301 rows containing non-finite values (stat_summary).
## Warning: Removed 1301 rows containing non-finite values (stat_summary).
## Warning: Removed 1301 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -34.24803 0.02933 0.12215 0.23399 0.29759 164.04083
## 1% 99%
## -0.6967936 2.4419088
## Warning: Removed 1576 rows containing non-finite values (stat_summary).
## Warning: Removed 1576 rows containing non-finite values (stat_summary).
## Warning: Removed 1576 rows containing non-finite values (stat_summary).
## Warning: Removed 1576 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -85.11398 0.03605 0.12303 0.22730 0.28658 164.04083
## 1% 99%
## -0.5450216 2.0106879
## Warning: Removed 1494 rows containing non-finite values (stat_summary).
## Warning: Removed 1494 rows containing non-finite values (stat_summary).
## Warning: Removed 1494 rows containing non-finite values (stat_summary).
## Warning: Removed 1494 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -85.11398 0.03828 0.12252 0.21778 0.27649 164.04083
## 1% 99%
## -0.4375933 1.7616476
## Warning: Removed 1419 rows containing non-finite values (stat_summary).
## Warning: Removed 1419 rows containing non-finite values (stat_summary).
## Warning: Removed 1419 rows containing non-finite values (stat_summary).
## Warning: Removed 1419 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -26.17654 0.03974 0.12271 0.20883 0.26888 164.04083
## 1% 99%
## -0.3560647 1.5464529
## Warning: Removed 1359 rows containing non-finite values (stat_summary).
## Warning: Removed 1359 rows containing non-finite values (stat_summary).
## Warning: Removed 1359 rows containing non-finite values (stat_summary).
## Warning: Removed 1359 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -26.17654 0.04113 0.12279 0.20343 0.26191 164.04083
## 1% 99%
## -0.3104566 1.3821669
## Warning: Removed 1306 rows containing non-finite values (stat_summary).
## Warning: Removed 1306 rows containing non-finite values (stat_summary).
## Warning: Removed 1306 rows containing non-finite values (stat_summary).
## Warning: Removed 1306 rows containing missing values (geom_point).